rm(list=ls())
# library("BiocManager")
# library("devtools")
library("limma")
library("igraph")
library("DT")
library("ggplot2")
# Load gene expression, study design and survival data
load(url("https://raw.githubusercontent.com/babelomics/metabolizer/develop/metabolizer.git/script/analysis/tcga_brca_example.RData"))
# gex[1:3,1:3]
# head(designFile, 2)
# tail(designFile, 2)
# survData[1:3,1:10]
http://metabolizer.babelomics.org/
Differential metabolic activity and discovery of therapeutic targets using summarized metabolic pathway models.
Gene Expression Integration into Pathway Modules Reveals a Pan-Cancer Metabolic Landscape
# Load source code and module files of the Metabolizer
source("https://raw.githubusercontent.com/babelomics/metabolizer/develop/metabolizer.git/script/utils/functions.main.14032019.R")
load(url("https://raw.githubusercontent.com/babelomics/metabolizer/develop/metabolizer.git/script/analysis/hsa_module_data_March2019.RData"))
# Run metabolizer
results <- metabolizer(hsa_module_data,
combat.vals=gex,
output_folder=NULL, saveName=NULL,
onesample=F,
expbased=T, fluxbased=F,
default_value=0.5,
moduleinfo=url("https://github.com/babelomics/metabolizer/raw/develop/metabolizer.git/script/analysis/hsa_moduleinfo.RData"))
Module activity calculations are done successfully number of the reactions with missing values: 60
## Warning for M00067_C20825: all the reaction node values are 0.5
## The citation for the Metabolizer tool:
## -> https://www.nature.com/articles/s41540-019-0087-2
## -> doi: 10.1038/s41540-019-0087-2
# Get module activities and module annotations
moduleActivities <- results$moduleActivities[ , colnames(gex)]
moduleAnnotation <- results$moduleActivities[ , which(!colnames(results$moduleActivities) %in% colnames(gex))]
# Discard modules with zero variance
idxKeep <- which(apply(moduleActivities, 1, var)!=0)
moduleActivities <- moduleActivities[idxKeep,]
# moduleAnnotation[1:3,]
# moduleActivities[1:3,1:5]
# Fit linear model for each module
fit <- lmFit(moduleActivities, design)
# Given the linear model fit, compute statistics (p-value, fold-change ... etc)
fit <- eBayes(fit)
# Extract a table of the top-ranked genes from a linear model fit.
top <- topTable(fit,coef="TissueTumor", p.value = 0.05, sort.by = "P", adjust.method = "fdr", number = nrow(moduleActivities))
# add module annotations to toptable
idx <- match(rownames(top), rownames(results$moduleActivities))
top <- cbind(moduleAnnotation[idx, c("Module.ID", "Main.metabolic.category", "Sub.metabolic.category", "Process.description", "StartMetaboliteIDs", "StartMetaboliteNames", "EndMetaboliteIDs", "EndMetaboliteNames", "Link.to.module")], top)
top %>% datatable(extensions = list('Buttons'="NULL", 'ColReorder'="NULL"),
options = list(pageLength = 5, scrollX = TRUE, rownames = F, width = 800,
colReorder = list(realtime = TRUE), filter = "top", dom = 'Blfrtip',
buttons = list('copy', 'print',
list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf'),
text = 'Download'
), I('colvis')),
#buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
lengthMenu = list(c(10,25,50,-1), c(10,25,50,"All"))
))
library(survival)
for(m in rownames(moduleActivities)){
# select module activites for tumor samples and dichotomize continuous variables
module_act <- moduleActivities[m, survData$V1[survData$V2=="Tumor"]]
quants <- quantile(module_act,probs = c(0.1,0.45,0.5,0.55,0.9))
# we used median value as threshold to generate binary variables
module_act <- as.character(ifelse(module_act < as.numeric(quants["50%"]), "DOWN", "UP"))
if(length(unique(module_act))==1){ next }
# create survival object
surv <- Surv(survData$Time, survData$Status)
# fit survival curves module activity
sfit <- survfit(surv ~ module_act)
# check if the difference between two curves is significant (p < 0.05)
my_diff <- survdiff(surv ~ module_act)
diff_pval <- 1 - pchisq(my_diff$chisq, length(my_diff$n) - 1)
if(diff_pval < 0.05){
# print(paste(m, "| p-value: ",round(diff_pval, 4)))
modulename <- moduleAnnotation$Process.description[moduleAnnotation$Module.ID==m]
plot(sfit, col = c("blue","red"), mark.time = T, main = paste0(m,"\n",modulename, "\n p-value ", round(diff_pval, 4)), cex.main= 0.7)
legend ("bottomleft", legend=c("Down","Up"), col=c("blue", "red"), lty=1:3)
}
}
library(corrplot)
# load this function: cor.mtest
cor.mtest <- function(mat, ...) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
p.mat <- t(sigModules)
M <- cor(p.mat, method = "pearson")
M_tumor <- cor(p.mat[designFile$V2=="Tumor",], method = "pearson")
M_normal <- cor(p.mat[designFile$V2=="Normal",], method = "pearson")
# Correlation of all samples
corrplot(M, type="upper", order="original", sig.level = 0.01, p.mat = cor.mtest(p.mat), title = "All samples", mar = c(0,0,1,0), tl.cex=1, pch.cex=0.8, pch.col = "black")
# Correlation of only normal samples
corrplot(M_normal, type="upper", order="original", sig.level = 0.01, p.mat = cor.mtest(p.mat[designFile$V2=="Normal",]), title = "Normal samples", mar = c(0,0,1,0), tl.cex=1, pch.cex=0.8, pch.col = "black")
# Correlation of only tumor samples
corrplot(M_tumor, type="upper", order="original", sig.level = 0.01, p.mat = cor.mtest(p.mat[designFile$V2=="Tumor",]), title = "Tumor samples", mar = c(0,0,1,0), tl.cex=1, pch.cex=0.8, pch.col = "black")
# Run PCA
pc <- prcomp(t(sigModules), scale. = T)
# Calculate variance explained by principal component
varExp <- round(pc$sdev^2/sum(pc$sdev^2),digits = 2)
# PCA Plot
pc_df <- as.data.frame(pc$x)
pc_df$group <- as.factor(designFile$V2)
p<-ggplot(pc_df,aes(x=PC1,y=PC2,color=group))
p<-p+geom_point(size=4, alpha = 0.7)
p <- p + theme_bw() + guides(colour = guide_legend(override.aes = list(size=6)))
p <- p + theme(strip.text.x = element_text(face = "italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"),
legend.title = element_text(size=14, face="bold"),
legend.text = element_text(size=14, face="bold"))
p <- p + xlab(paste0("PC1 variance %", varExp[1]))
p <- p + ylab(paste0("PC2 variance %", varExp[2]))
p <- p + labs(col="Tissue") + theme(plot.margin = unit(c(1,1,1,1), "cm"))
p
# Visualize the strength of associations between the principal components of an omic data matrix
pc <- prcomp(sigModules, scale. = F)
var_cos2 <- pc$x * pc$x
corrplot(var_cos2[,1:6], is.corr=FALSE)